home *** CD-ROM | disk | FTP | other *** search
- set output error
- set display page 23
- set more 1
- #delimit ;
-
- di _n(13) in wh
- " ___ ____ ____ ____ ____ tm" _n
- " /__ / ____/ / ____/" _n
- "___/ / /___/ / /___/ Stat.Kit: Programmed Extensions to Stata" _n
- "-------------------------------------------------------------------------"
- _n(2) ;
-
- di in gr
- "Stat.Kit is a collection of programs written in Stata that provides a variety"
- _n
- "of statistical tests. Although Stat.Kit is written in Stata's programming"
- _n
- "language, you do not have to know anything about the programming language to"
- _n
- "use it. We begin by loading the kit:" _n(2) ;
-
-
- #delimit cr
- mac def path
- capture run nullfile.tut
- if _rc {
- mac def path "\stata\"
- capture run %path`nullfile.tut
- if _rc {
- mac def path "/usr/stata/"
- capture run %path`nullfile.tut
- if _rc {
- #delimit ;
- di in red
- "I cannot find the other tutorial files. I have looked in the current" _n
- "directory and in \stata (DOS) or /usr/stata (Unix). Is Stata installed" _n
- "correctly?" _n(2)
- "In any case, I cannot run the tutorial." ;
- #delimit cr
- exit
- }
- }
- }
- macro define F5 "do %path`contents.tut;"
- macro define F6 "do %path`statkit.tut;"
-
- drop _all
- label drop _all
-
- #delimit ;
- di in wh ". run %path`Stat.Kit" ;
- run %path`Stat.Kit ;
- di ; set more 0 ; more ; set more 1 ;
-
- di _n(2) in wh
- "Overview" _n
- "--------" _n ;
-
- di in gr
- "Stat.Kit provides fifteen new commands:" _n(2)
- _col(8) in wh "dbeta" in gr _col(20)
- "calculate DF-Betas influence statistics" _n
- _col(8) in wh "genrank" in gr _col(20)
- "create ranks of variable accounting for ties" _n
- _col(8) in wh "genstd" in gr _col(20)
- "create standardized (mean 0, variance 1) variable" _n
- _col(8) in wh "glogit" in gr _col(20) "grouped logit" _n
- _col(8) in wh "gprobit" in gr _col(20) "grouped probit" _n
- _col(8) in wh "ksmirnov" in gr _col(20)
- "Kolmogorov-Smirnov equality-of-distributions test" ;
-
- di in gr
- _col(8) in wh "kwallis" in gr _col(20)
- "Kruskal-Wallis one-way analysis-of-variance test" _n
- _col(8) in wh "means" in gr _col(20)
- "arithmetic, geometric, and harmonic means" _n
- _col(8) in wh "ranksum" in gr _col(20)
- "Wilcoxon rank-sum (Mann-Whitney two sample) statistic" _n
- _col(8) in wh "regdw" in gr _col(20)
- in wh "regress" in gr " with Durbin-Watson statistic" _n
- _col(8) in wh "signrank" in gr _col(20)
- "Wilcoxon matched-pairs signed-ranks test" _n
- _col(8) in wh "signtest" in gr _col(20)
- "equality of medians for matched observations test" _n
- _col(8) in wh "spearman" in gr _col(20)
- "Spearman rank correlation coefficient" _n
- _col(8) in wh "teststd" in gr _col(20)
- "test equality of variances and against known constant" _n
- _col(8) in wh "ttest" in gr _col(20)
- "t-tests of all sorts" _n ;
-
- di in gr
- "We will not be able to demonstrate them all here. At the conclusion of this"
- _n
- "tutorial, you might type '"
- in wh "help Stat.Kit" in gr "' to learn more about the commands." ;
- set more 0 ; more ; set more 1 ;
-
-
- di _n(2) in wh
- "Stata Demonstration Version Note" _n
- "--------------------------------" _n ;
-
- di in gr
- "We were forced to omit the "
- in wh "ranksum" in gr " command from the demonstration version of" _n
- "Stat.Kit. During the process of making its calculations, "
- in wh "ranksum" in gr " writes a" _n
- "temporary data set, something the demonstration version of Stata cannot do."
- _n
- "If you attempt to use "
- in wh "ranksum" in gr ", you will get the message '" in ye "not available in" _n
- "demonstration version" in gr "'." _n(2)
- "If you are using the Unix version of Stata in demonstration mode, do not use"
- _n
- "the "
- in wh "ranksum" in gr
- " command. Your version of Stat.Kit contains the real " in wh "ranksum" _n
- in gr
- "program, but it will not work in demonstration mode." _n(12) ;
- set more 0 ; more ; set more 1 ;
-
- di _n(2) in wh
- "Using Stat.Kit" _n
- "--------------" _n ;
-
- di in gr
- "Let's try some of the Stat.Kit commands. Before we can try them, however, we"
- _n
- "need a statistical problem, so consider the following:" _n(2)
- "You are testing the effectiveness of a new fuel additive. You run an exper-"
- _n
- "iment with 12 cars. You first run each without the fuel treatment and measure"
- _n
- "the mileage. You then add the treatment and repeat the experiment. The "
- "result" _n
- "of the experiment is:" _n ;
-
- di in gr
- _col(12)
- _col(12) "Without With" _col(43) "Without With" _n
- _col(11) "Treatment Treatment" _col(42) "Treatment Treatment" _n
- _col(10) _dup(33) "-" _col(41) _dup(33) "-" _n in ye
- _col(15) "20 24" _col(46) "18 17" _n
- _col(15) "23 25" _col(46) "24 28" _n
- _col(15) "21 21" _col(46) "20 24" _n
- _col(15) "25 22" _col(46) "24 27" _n
- _col(15) "18 23" _col(46) "23 21" _n
- _col(15) "17 18" _col(46) "19 23" _n ;
-
- di in gr
- "Stata is now loading the data." _n ;
-
- input mpg0 mpg1 ;
- 20 24;
- 23 25;
- 21 21;
- 25 22;
- 18 23;
- 17 18;
- 18 17;
- 24 28;
- 20 24;
- 24 27;
- 23 21;
- 19 23;
- end;
- set more 0 ; more ; set more 1 ;
-
- di _n(9) in wh _dup(79) "-" _n in gr
- "Just because we have loaded Stat.Kit does not mean we cannot use all the" _n
- "other Stata commands, so let's first "
- in wh "summarize" in gr " the data. We have named the" _n
- "variables mpg0 and mpg1, mileage without and with the fuel treatment, respec-"
- _n
- "tively." _n
- in wh _dup(79) "-" _n(3)
- ". summarize" ;
- noisily summarize ;
- di ; set more 0 ; more ; set more 1 ;
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "Stat.Kit's "
- in wh "means" in gr
- " command will tell us the arithmetic, geometric, and harmonic"
- _n
- "means:" _n
- in wh _dup(79) "-" _n(2)
- ". means" ;
- means ;
- di _n ; set more 0 ; more ; set more 1 ;
-
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "Let's test whether the arithmetic mean of mileage without the fuel treatment"
- _n
- "is 20 mpg:" _n
- in wh _dup(79) "-" _n(4)
- ". ttest mpg0=20" ;
- ttest mpg0=20 ;
-
- di _n(4) in wh _dup(79) "-" _n in gr
- "We cannot reject the hypothesis that the mean is 20, at least at any signifi-"
- _n
- "cance level below 23%." _n
- in wh _dup(79) "-" _n(2) ;
- set more 0 ; more ; set more 1 ;
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "Let's test whether the mean mileages with and without the treatment are equal."
- _n
- "Since the same cars are used to test the effectiveness of the treatment, we" _n
- "use a paired t-test:" _n
- in wh _dup(79) "-" _n(2)
- ". ttest mpg0=mpg1, paired" ;
- ttest mpg0=mpg1, paired ;
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "The means are different at the 5% level." _n
- in wh _dup(79) "-" _n ;
- set more 0 ; more ; set more 1 ;
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "If different cars had been used to test the effectiveness of the fuel treat-"
- _n
- "ment, the data would not be paired. In that case, we would use the (standard)"
- _n
- "unpaired t-test:" _n
- in wh _dup(79) "-" _n(2)
- ". ttest mpg0=mpg1" ;
- ttest mpg0=mpg1 ;
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "We see that the means are not different at the 5% level in this case." _n
- in wh _dup(79) "-" _n ;
- set more 0 ; more ; set more 1 ;
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "Let's summarize. We obtained the following two results:" _n(2)
- _col(18) "method" _col(33) "t-statistic" _col(49) "significance level"_n
- _col(17) "--------" _col(33) "----------" _col(49) _dup(17) "-" _n
- _col(18) "paired" _col(35) in ye "-2.24" _col(55) "0.0463" in gr _n
- _col(17) "unpaired" _col(35) in ye "-1.43" _col(55) "0.1667" _n ;
-
- di in gr
- "Only one of these results can be right. If the same cars were used with and"
- _n
- "without the fuel treatment, then you use the paired t-test. The question is"
- _n
- "not whether two independent samples share the same mean, but whether the same"
- _n
- "car yielded improved mileage when the fuel treatment was added." _n(2)
- "If 24 different cars were used, 12 without and another 12 with the fuel treat-"
- _n
- "ment, then you use the unpaired result. In this case you do want to test" _n
- "whether two independent samples share the same mean." _n(2)
- "The data is really for 12 cars, before-and-after. We just wanted to show you"
- _n
- "we could do it either way." _n(2)
- "If we typed '"
- in wh "ttest mpg0=mpg1, unequal" in gr
- "', Stata would perform the test for un-" _n
- "paired data without assuming that the variances are equal." _n
- in wh _dup(79) "-" _n ;
- set more 0 ; more ; set more 1 ;
-
-
- di _n(14) in wh _dup(79) "-" _n in gr
- "Next, we'll test whether the medians are the same using the "
- in wh "signtest" in gr " command." _n
- in wh "signtest" in gr
- " tests the equality of medians for matched pairs of observations. It"
- _n
- "does this by calculating the difference between the two variables. The hypo-"
- _n
- "thesis that the medians are equal is equivalent to the hypothesis that the" _n
- "median of the differences is zero. This, in turn, is equivalent to the hypo-"
- _n
- "thesis that the true proportion of positive (negative) signs is one-half."
- _n
- in wh _dup(79) "-" _n(4)
- ". signtest mpg0=mpg1" ;
- set more 0 ; more ; set more 1 ;
- signtest mpg0=mpg1 ;
- di _n in wh _dup(79) "-" _n in gr
- "(There is one exact equality in the data, which "
- in wh "signtest" in gr " allocates equally to"
- _n
- "positive and negative.)" _n
- in wh _dup(79) "-" ;
- set more 0 ; more ; set more 1 ;
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "Let's test whether the distributions are the same using the Wilcoxon matched-"
- _n
- "pairs signed-ranks test:" _n
- in wh _dup(79) "-" _n(3)
- ". signrank mpg0=mpg1" ;
- signrank mpg0=mpg1 ;
-
- di _n(3) in wh _dup(79) "-" _n in gr
- "We find that the distributions are different at the 5% level." _n
- in wh _dup(79) "-" _n ;
- set more 0 ; more ; set more 1 ;
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "We now switch data sets. We are going to use state data from the 1980 Census"
- _n
- "which we have previously stored in the file called census.dta. We will only"
- _n
- "need a few variables from this data. We "
- in wh "describe" in gr " the data below and on the" _n
- "next screen we will " in wh "list" in gr " some of it for you." _n
- in wh _dup(79) "-" _n(2)
- ". use %path`census, clear" ;
- noisily use %path`census, clear ;
- di _n in wh ". keep state marriage medage pop18p" ;
- keep state marriage medage pop18p ;
- di _n in wh ". describe" ;
- noisily describe ;
- set more 0 ; more ; set more 1 ;
- di _n in wh ". list in 1/20" ;
- noisily list in 1/20 ;
- set more 0 ; more ; set more 1 ;
-
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "We want to examine the marriage rate, which we define as the number of mar-" _n
- "riages divided by the state's population aged 18 years and over. First, we" _n
- in wh "generate"
- in gr " the variable and " in wh "summarize" in gr " it:" _n
- in wh _dup(79) "-" _n(2)
- ". generate mrgrate = marriage/pop18p" _n(2)
- ". summarize mrgrate" ;
- gen mrgrate=marriage/pop18p ;
- noisily summarize mrgrate ;
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "We now examine the correlation of mrgrate with age. Stata's built-in "
- in wh "corre" in gr "-"
- _n in wh "late" in gr
- " command calculates the Pearson product-moment correlation coefficient." _n
- "Stat.Kit's "
- in wh "spearman" in gr
- " command calculates the Spearman rank correlation coef-" _n
- "ficient:" _n
- in wh _dup(79) "-" _n(2)
- ". correlate mrgrate medage" ;
- set more 0 ; more ; set more 1 ;
- noisily correlate mrgrate medage ;
- di _n in wh ". spearman mrgrate medage" ;
- noisily spearman mrgrate medage ;
- set more 0 ; more ; set more 1 ;
-
- di _n(2) in wh _dup(79) "-" _n in gr
- "We find that the standard (Pearson) correlation coefficient between mrgrate"
- _n
- "and medage is "
- in ye "-0.0177" in gr ", whereas the Spearman rank correlation coefficient is"
- _n
- in ye "-0.5551." in gr " Let's find out why:" _n
- in wh _dup(79) "-" _n(2)
- ". summarize mrgrate, detail" ;
- noisily summarize mrgrate, detail ;
- set more 0 ; more ; set more 1 ;
-
- di _n in wh _dup(79) "-" _n in gr
- "One state has a marriage rate of "
- in ye "0.1955" in gr ", whereas the second highest rate in" _n
- "our data is " in ye "0.0247" in gr "!" _n
- in wh _dup(79) "-" _n ;
- set more 0 ; more ; set more 1 ;
-
- di _n in wh ". list if mrgrate>.0247" ;
- noisily list if mrgrate>.0247 ;
- di _n(2) in wh _dup(79) "-" _n in gr
- "Not surprisingly, the state with the highest marriage rate is Nevada." _n
- in wh _dup(79) "-" _n(7) ;
- drop _all ;
- label drop _all ;
- macro define F6 "do %path`graphkit.tut;" ;
- set more 0 ; more ; set more 1 ;
-
- di _n(4) in white
- "Demonstration ends" _n
- "------------------" _n ;
-
-
- di in green
- "That concludes our short demonstration, but there's much more. We now return"
- _n
- "control to you. Some suggestions:" _n ;
-
- di in green
- "If you ..." _col(34) "Then we will show you ..." _n
- " Press " in white "F5" in green _col(38) "a table of tutorial contents" _n
- " Press " in white "F6" in green _col(38) "the next tutorial, "
- in white "graphkit.tut" _n ;
-
- run %path`tobuy.tut ;